import pandas as pd
import numpy as np
from wordcloud import WordCloud
import matplotlib.pyplot as plt
df = pd.read_excel("RA_change reason.xlsx", sheet_name = "Sheet1")
df
| cik | year | new_peers | old_peers | turnover_ratio | Quote | Category | Change | url | url_prev | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 25445 | 2007 | 4 | 19 | 0.173913 | The Committee reviewed two potential peer grou... | revenues, net income, market capitalization, d... | NaN | https://www.sec.gov/Archives/edgar/data/25445/... | https://www.sec.gov/Archives/edgar/data/25445/... |
| 1 | 7536 | 2007 | 23 | 12 | 0.657143 | The list differs from the “Peer Group” used fo... | size and scope, publicly-traded, historical co... | NaN | https://www.sec.gov/Archives/edgar/data/7536/0... | https://www.sec.gov/Archives/edgar/data/7536/0... |
| 2 | 875320 | 2007 | 7 | 7 | 0.500000 | We select the companies for this comparator gr... | industry,operating expenses and market capital... | NaN | https://www.sec.gov/Archives/edgar/data/875320... | https://www.sec.gov/Archives/edgar/data/875320... |
| 3 | 310569 | 2007 | 10 | 34 | 0.227273 | Each year the Committee reviews and considers ... | base salary, target bonus, total cash, long-te... | NaN | https://www.sec.gov/Archives/edgar/data/310569... | https://www.sec.gov/Archives/edgar/data/310569... |
| 4 | 55242 | 2007 | 13 | 23 | 0.361111 | We included many of the companies in the peer ... | revenue, operational scope, organizational com... | NaN | https://www.sec.gov/Archives/edgar/data/55242/... | https://www.sec.gov/Archives/edgar/data/55242/... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 195 | 59527 | 2018 | 4 | 21 | 0.160000 | We use a peer group of publicly traded industr... | us-based, revenue, compete for talent, shareho... | NaN | https://www.sec.gov/Archives/edgar/data/59527/... | https://www.sec.gov/Archives/edgar/data/59527/... |
| 196 | 1103982 | 2018 | 3 | 25 | 0.107143 | In constructing our Compensation Survey Peer G... | Revenue, market capitalization, industry, mark... | NaN | https://www.sec.gov/Archives/edgar/data/110398... | https://www.sec.gov/Archives/edgar/data/110398... |
| 197 | 895419 | 2018 | 2 | 17 | 0.105263 | The Committee, assisted by Radford, selects Cr... | Business, size, revenue, market capitalization... | NaN | https://www.sec.gov/Archives/edgar/data/895419... | https://www.sec.gov/Archives/edgar/data/895419... |
| 198 | 915840 | 2018 | 1 | 11 | 0.083333 | These companies were chosen because, in additi... | size | acquired | https://www.sec.gov/Archives/edgar/data/915840... | https://www.sec.gov/Archives/edgar/data/915840... |
| 199 | 1057706 | 2018 | 4 | 9 | 0.307692 | Industry survey data was collected from bankin... | Asset size, compete for talent | NaN | https://www.sec.gov/Archives/edgar/data/105770... | https://www.sec.gov/Archives/edgar/data/105770... |
200 rows × 10 columns
# Concatenate all text in the 'Category' column
text = " ".join(str(quote) for quote in df['Category'])
# Generate the word cloud
wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='plasma', max_words=100, min_font_size=10).generate(text)
# Display the word cloud
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off') # No axis for cleaner visualization
plt.show()